This analysis consists of four major parts (three of which are essential to its interpretation): 1) the base QC and data correction (based on appropriate model choice) 2) DEA and enrichment analysis (annotation) 3) Dissection of GEX variance in gene sets (delineation of status effect) 4) Networking approach: co-regulations (propagation of status effect on rest of transcriptome)
Analysis of monocyte data (SCZ vs CTR): most advanced model
Plotting the first QC results:
Density plot
Sample-sample correlation
Covariate correlation
#Covariate correlation
for (i in colnames(imputed.covs))
imputed.covs[,i] <- as.numeric(as.character(imputed.covs[,i]))
MEs_lin(imputed.covs,imputed.covs)#PC-covariate correlation (nomal p)
PCA_cov_cor_R(imputed.covs, filt.df)Next step: QC on vst-normalized expression values.
for (i in c("status", "smoking", "coffee", "sex", "batch", "drugs", "antipsychotics", "antidepressive", "antianxiety"))
imputed.covs[,i] <- as.factor(imputed.covs[,i])
pca <- plotPCA.custom(as.matrix(trnsf.df), intgroup=c("status", "batch", "sex", "smoking"),
ntop = 50000, returnData=TRUE, metadata=imputed.covs, pc.1 = 1, pc.2 = 2)
PoV <- round(100 * attr(pca, "percentVar"))
PCAplot(pca, "status", PoV.df=PoV, pc.1 = 1, pc.2 = 2) PCAplot(pca.clean, "status", PoV.df=PoV.clean, pc.1 = 1, pc.2 = 2)#Check influence of covariates on data variance after transformation
plotVarPart(vp)print("The model used is:")[1] “The model used is:”
dds@design~Mean.Per.Base.Cov. + Mapped + Exonic.Rate + Genes.Detected + rRNA.rate + RIN + BMI + age_clusters + batch + sex + status
#PC-covariate correlation (nominal p)
PCA_cov_cor_R(clean_covs, batch.rem)PCAplot(pca.cor, "status", PoV.df=PoV.cor, pc.1 = 1, pc.2 = 2)We then go into the differential gene-expression analysis:
out of 14416 with nonzero total read count adjusted p-value < 0.1 LFC > 0 (up) : 41, 0.28% LFC < 0 (down) : 58, 0.4% outliers [1] : 0, 0% low counts [2] : 0, 0% (mean count < 49) [1] see ‘cooksCutoff’ argument of ?results [2] see ‘independentFiltering’ argument of ?results
As well as the number of differentially expressed genes at lfc </> -/+0.5 at padj < 0.1:
[1] 28
#DT::renderDT(data.frame(res.complex), "OH1.5A",scrollY=1000)
createDT(data.frame(res), "Status", scrollY=1000)EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.1,
FCcutoff = 0.5,
labSize = 6,
xlim = c(-2.5,2.5),
ylim = c(-0.5,2),
legendPosition = 'right')#Plotting DEGs
ComplexHeatmap::pheatmap(batch.rem[rownames(batch.rem) %in% upreg.genesALL,], scale= "row",
cluster_rows = T, cluster_cols = T, annotation_legend = T, show_colnames = F, show_rownames = T,
legend = T, treeheight_row = 0, color = heatmap.color.code,
annotation_col=annot_df, annotation_colors = annot_colors, fontsize_row = 8)ComplexHeatmap::pheatmap(batch.rem[rownames(batch.rem) %in% downreg.genesALL,], scale= "row",
cluster_rows = T, cluster_cols = T, annotation_legend = T, show_colnames = F, show_rownames = T,
legend = T, treeheight_row = 0, color = heatmap.color.code,
annotation_col=annot_df, annotation_colors = annot_colors, fontsize_row = 8)Making meaning of our DEGs
dotplot(GO.up, split="ONTOLOGY") + facet_grid(ONTOLOGY~., scale="free")print("No GO-term enrichments for downregulated genes")[1] “No GO-term enrichments for downregulated genes”
#DT::renderDT(data.frame(res.complex), "OH1.5A",scrollY=1000)
paste("Reminder: we have", length(upreg.genesALL), "upregulated and", length(downreg.genesALL), "downregulated genes.")[1] “Reminder: we have 41 upregulated and 58 downregulated genes.”
createDT(DEG.enrich.res, "Enrichment", scrollY=1000)ComplexHeatmap::pheatmap(DEG.enrich, cluster_rows = F, cluster_cols = F, annotation_legend = F, show_colnames = T,
legend_breaks= c(0,-log(0.05), 10, 20, 20),
legend = T, color = colorRampPalette(RColorBrewer::brewer.pal(6,"RdPu"))(30),
legend_labels=c("0","2.995", "10", "20","-log2(q) \n\n\n"))ComplexHeatmap::pheatmap(overlap.genesets, cluster_rows = F, cluster_cols = F)So we select the NFKB, LPS, and chronic/acute GC stimulation gene sets, as well as the DEX 2.5/5nM stimulation
gene.sets <- c("NfKB", "LPS", "acute_GC", "chronic_GC", "DEX_2.5nM", "DEX_5nM")
for (i in gene.sets){
panel <- panel.list[[i]]
DE <- DE.list[[i]]
df <- panel[DE$log2FoldChange < -0.5 | DE$log2FoldChange > 0.5,]
df <- df[,order(as.numeric(colnames(df)), method="radix", decreasing=F)]
ComplexHeatmap::pheatmap(df, scale= "row",
cluster_rows = T, cluster_cols = T, annotation_legend = T, show_colnames = F, show_rownames = T,
legend = T, treeheight_row = 0, color = heatmap.color.code,
annotation_col=annot_df, annotation_colors = annot_colors, fontsize_row = 8, column_title=paste("DEGs within the", i, "list"))
}(1A) Compound median lFC^2 for each signature
#mean lFC to point directions
compoundlFC <- median(DE.list[[3]][order(DE.list[[3]]$log2FoldChange^2, decreasing=T),][1:10,]$log2FoldChange)
for (i in gene.sets[-1]){
top10 <- DE.list[[i]][order(DE.list[[i]]$log2FoldChange^2, decreasing=T),][1:10,]
compoundlFC <- rbind(compoundlFC,median(top10$log2FoldChange))}
rownames(compoundlFC) <- gene.sets
ComplexHeatmap::pheatmap(compoundlFC^2, color = colorRampPalette(RColorBrewer::brewer.pal(6,"RdPu"))(30),
cluster_rows = F)for (i in gene.sets){
dat <- batch.rem[rownames(batch.rem) %in% na.omit(gene.panels_mrgd[,i]),]
# plots <- PCA_cov_cor_R(clean_covs, dat) #if
list.pca <- plotPCA.custom(as.matrix(dat), intgroup=c("status", "batch", "sex", "RIN"),
ntop = 50000, returnData=TRUE, metadata=clean_covs, pc.1=1, pc.2=2)
list.pov <- round(100 * attr(list.pca, "percentVar"))
plot <- PCAplot(list.pca, "status", PoV.df=list.pov, pc.1=1, pc.2=2)
print(plot)}(2A) Variance causa testing - does GEX variance come really from status?
PC-expression t-test
PC.boxplots <- ggarrange(plotlist=scatters[names(scatters) %in% gene.sets], common.legend = T)
PC.boxplotsGEX.stat.cor <- GEX.stat.cor[rownames(GEX.stat.cor) %in% gene.sets,]
matrix_pvalue <- matrix_pvalue[rownames(matrix_pvalue) %in% gene.sets,]
ComplexHeatmap::pheatmap(cbind(GEX.stat.cor[,1]), cluster_rows = F, cluster_cols = T, annotation_legend = F, show_colnames = T,
legend_breaks= c(-0.6,-0.3,0,0.3,0.6,0.6),
legend = T, color = heatmap.color.code,
legend_labels=c("-0.6","-0.3","0","0.3","0.6","Spearman rho \n\n\n"),
display_numbers = as.matrix(matrix_pvalue[,1]), column_title="Status-GEX variance correlation")
ComplexHeatmap::pheatmap(cbind(GEX.stat.cor[,3]), cluster_rows = F, cluster_cols = T, annotation_legend = F, show_colnames = T,
legend_breaks= c(-3.5,-2.5,-1,0,1,2.5,2.5),
legend = T, color = heatmap.color.code,
legend_labels=c("-3.5","-2.5","-1","0","1","2.5","beta \n\n\n"),
display_numbers = as.matrix(matrix_pvalue[,2]), column_title="GEX variance prediction based on status (beta effect size)")#Compiling a new list of scatter plots
#Compile a list of the usable plots:
DEG.NfKB <- list()
names(DEGscatters$DEG)[1] “DEG” “DEG.up” “DEG.down” “Microglia”
[5] “Astrocytes” “NfKB” “IFNy” “LPS”
[9] “IL4” “PBMC_GC” “chronic_GC” “acute_GC”
[13] “DEX_2.5nM” “DEX_5nM” “Schizophrenia” “Manual.selection”
for (i in names(DEGscatters)[1:3])
DEG.NfKB <- append(DEG.NfKB, DEGscatters[[i]]["NfKB"])
#1) co-reg between NfKB and DEGs
DEG.NfKB.compoundScatter <- ggarrange(plotlist=DEG.NfKB[1:length(DEG.NfKB)], common.legend = T)
DEG.NfKB.compoundScatter (2) Gene program-Gene program interaction based on PCs
#2) co-regs between NfKB and all other gene sets
all.NfKB <- DEGscatters[["NfKB"]][!names(DEGscatters[[i]]) %in% c("NfKB","DEG", "DEG.up", "DEG.down")]
all.NfKB.compoundScatter <- ggarrange(plotlist=all.NfKB[1:length(all.NfKB)])
all.NfKB.compoundScatterComplexHeatmap::pheatmap(rcorr(as.matrix(cbind(DEGscatter.df,"Status"=scatter.df[,1])), type="spearman")$r,
color = heatmap.color.code, fontsize_row = 8, fontsize_col = 8)ComplexHeatmap::pheatmap(z, cluster_rows = T, cluster_cols = T, annotation_legend = F, show_colnames = T,
legend_breaks= c(-2,-1,0,1,2,2),
legend = T, color = heatmap.color.code,
legend_labels=c("-2","-1","0","1","2","z-score \n\n\n"),
display_numbers = P)